package com.huan.hadoop.mr;

import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

import java.util.Map;

/**
 * 自定义分区，根据网站进行分区
 * Partitioner<K,V> map阶段输出的key和value
 *
 * @author huan.fu
 * @date 2023/7/12 - 20:12
 */
public class WebSitePartitioner extends Partitioner<Text, Text> implements Configurable {

    private Configuration configuration;

    private static final Map<String, Integer> webSitePartitionMap = Maps.newHashMap();

    static {
        webSitePartitionMap.put("www.baidu.com", 0);
        webSitePartitionMap.put("www.google.com", 1);
    }

    @Override
    public int getPartition(Text mapOutKey, Text mapOutValue, int numPartitions) {
        // 获取网站
        String webSite = mapOutKey.toString();
        // 获取分区
        return webSitePartitionMap.getOrDefault(webSite, 0);
    }

    @Override
    public void setConf(Configuration conf) {
        this.configuration = conf;
    }

    @Override
    public Configuration getConf() {
        return configuration;
    }
}
